GIS & Geospatial Data Analysis (Fall 2025)
readr: Read and write datadplyr continuedggplot: Data VisualizationTidyverse coding style and packages
uni_df <- tibble::tribble(
~university, ~year, ~current_country, ~lat, ~lon, ~exist_today,
"Paris", 1150, "France", 48.8566, 2.3522, TRUE,
"Salerno", 1173, "Italy", 40.7711, 14.7905, TRUE,
"Reggio", 1188, "Italy", 44.6450, 10.9277, TRUE,
"Oxford", 1190, "United Kingdom", 51.7520, -1.2576, TRUE,
"Bologna", 1200, "Italy", 44.4989, 11.3275, TRUE
)
uni_df| university | year | current_country | lat | lon | exist_today |
|---|---|---|---|---|---|
| Paris | 1150 | France | 48.8566 | 2.3522 | TRUE |
| Salerno | 1173 | Italy | 40.7711 | 14.7905 | TRUE |
| Reggio | 1188 | Italy | 44.6450 | 10.9277 | TRUE |
| Oxford | 1190 | United Kingdom | 51.7520 | -1.2576 | TRUE |
| Bologna | 1200 | Italy | 44.4989 | 11.3275 | TRUE |
uni_sf <- uni_df |> st_as_sf(coords = c('lon','lat'), crs = 4326)
uni_sf |> mapview(label = 'university')Reading layer `uni-1200-sf' from data source
`/Users/toyuan/dohnanyi/data/uni-1200-sf.gpkg' using driver `GPKG'
Simple feature collection with 5 features and 4 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -1.2576 ymin: 40.7711 xmax: 14.7905 ymax: 51.752
Geodetic CRS: WGS 84
dplyr for Exploratory Data Analysisdplyr|>Import Data: Challenged Books from 2000 to 2010
| title | book_id | author | date | year | removed | explicit | antifamily | occult | language | lgbtq | violent | state | political_value_index | median_income | hs_grad_rate | college_grad_rate |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| House of the Spirits, The | 927 | Allende, Isabel | 2005-04-01 | 2005 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
| It’s Not the Stork!: A Book About Girls, Boys, Babies and Bodies | 1024 | Harris, Robie | 2008-02-06 | 2008 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
| King Stork | 1087 | Pyle, Howard | 2008-10-02 | 2008 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
| How They Met and Other Stories | 936 | Levithan, David | 2008-10-05 | 2008 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
| Ghost in the Shell | 764 | Masamune, Shirow | 2008-10-02 | 2008 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
| King Stork | 1087 | Pyle, Howard | 2003-09-13 | 2003 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | AK | -13.4 | 15707.5 | 8.738042 | 0.6762701 |
Tidy Data and Data Cleaning (next week)
title book_id author date
Length:931 143 : 17 Length:931 Min. :2000-01-01
Class :character 868 : 12 Class :character 1st Qu.:2002-11-16
Mode :character 1849 : 10 Mode :character Median :2006-02-10
1025 : 9 Mean :2005-12-16
1083 : 8 3rd Qu.:2009-03-18
399 : 7 Max. :2010-09-09
(Other):868 NA's :11
year removed explicit antifamily occult language lgbtq violent
Min. :2000 0:714 0:632 0:891 0:895 0:694 0:842 0:797
1st Qu.:2002 1:217 1:299 1: 40 1: 36 1:237 1: 89 1:134
Median :2006
Mean :2005
3rd Qu.:2009
Max. :2010
NA's :11
state political_value_index median_income hs_grad_rate
Length:931 Min. :-20.2000 Min. :-8466 Min. :-6.662
Class :character 1st Qu.: -1.8000 1st Qu.: 1274 1st Qu.: 1.038
Mode :character Median : 2.0000 Median : 4218 Median : 2.338
Mean : 0.0304 Mean : 4530 Mean : 2.833
3rd Qu.: 4.0000 3rd Qu.: 9908 3rd Qu.: 5.538
Max. : 13.4000 Max. :19936 Max. : 8.738
college_grad_rate
Min. :-9.2237
1st Qu.:-2.0237
Median : 0.2763
Mean : 0.6043
3rd Qu.: 2.5763
Max. : 9.1763
Dimension:
row x column)Summary statistics:
title book_id author date
Length:920 143 : 17 Length:920 Min. :2000-01-01
Class :character 868 : 12 Class :character 1st Qu.:2002-11-16
Mode :character 1849 : 10 Mode :character Median :2006-02-10
1025 : 9 Mean :2005-12-16
1083 : 8 3rd Qu.:2009-03-18
399 : 7 Max. :2010-09-09
(Other):857
year removed explicit antifamily occult language lgbtq violent
Min. :2000 0:706 0:628 0:880 0:884 0:686 0:832 0:788
1st Qu.:2002 1:214 1:292 1: 40 1: 36 1:234 1: 88 1:132
Median :2006
Mean :2005
3rd Qu.:2009
Max. :2010
state political_value_index median_income hs_grad_rate
Length:920 Min. :-20.20000 Min. :-8466 Min. :-6.662
Class :character 1st Qu.: -2.12500 1st Qu.: 1274 1st Qu.: 1.038
Mode :character Median : 2.00000 Median : 4218 Median : 2.338
Mean : 0.01326 Mean : 4522 Mean : 2.808
3rd Qu.: 4.00000 3rd Qu.: 9908 3rd Qu.: 5.538
Max. : 13.40000 Max. :19936 Max. : 8.738
college_grad_rate
Min. :-9.2237
1st Qu.:-1.7237
Median : 0.2763
Mean : 0.6042
3rd Qu.: 2.5763
Max. : 9.1763
dplyr functions: mutate()mutate(): creating new variables (columns) from existing variablescount()dplyr functions: group_by()dplyr functions: group_by()book_challenge_df |> group_by(year) |> summarize(
total_count = n(),
removed_count = sum(removed == 1),
removed_pct = (removed_count / total_count) * 100
)| year | total_count | removed_count | removed_pct |
|---|---|---|---|
| 2000 | 71 | 26 | 36.61972 |
| 2001 | 111 | 26 | 23.42342 |
| 2002 | 52 | 16 | 30.76923 |
| 2003 | 88 | 14 | 15.90909 |
| 2004 | 83 | 20 | 24.09639 |
| 2005 | 52 | 15 | 28.84615 |
| 2006 | 54 | 9 | 16.66667 |
| 2007 | 38 | 15 | 39.47368 |
| 2008 | 122 | 37 | 30.32787 |
| 2009 | 197 | 22 | 11.16751 |
| 2010 | 52 | 14 | 26.92308 |
# A tibble: 11 × 4
year total_count removed_count removed_pct
<dbl> <int> <int> <dbl>
1 2000 71 26 36.6
2 2001 111 26 23.4
3 2002 52 16 30.8
4 2003 88 14 15.9
5 2004 83 20 24.1
6 2005 52 15 28.8
7 2006 54 9 16.7
8 2007 38 15 39.5
9 2008 122 37 30.3
10 2009 197 22 11.2
11 2010 52 14 26.9total_count <- as.vector(
tapply(
book_challenge_df$removed,
book_challenge_df$year,
length
)
)
removed_count <- as.vector(
tapply(
book_challenge_df$removed == 1,
book_challenge_df$year,
sum
)
)
year_summary <- data.frame(
year = 2000:2010,
total_count = total_count,
removed_count = removed_count,
removed_pct = (removed_count / total_count) * 100
)
year_summarystate_summary <- book_challenge_df |>
group_by_at(state_level_vars) |>
summarize(count = n())
state_summary| state | political_value_index | median_income | hs_grad_rate | college_grad_rate | count |
|---|---|---|---|---|---|
| AK | -13.4 | 15707.5 | 8.7380421 | 0.6762701 | 9 |
| AL | -13.2 | -5054.0 | -4.2619579 | -5.0237299 | 10 |
| AR | -8.8 | -6258.5 | -4.2619579 | -7.3237299 | 5 |
| AZ | -6.1 | 3490.5 | 1.4380421 | -0.5237299 | 12 |
| CA | 7.4 | 10119.0 | -2.7619579 | 2.5762701 | 50 |
| CO | -0.2 | 10346.5 | 7.3380421 | 8.6762701 | 80 |
| CT | 7.1 | 16247.5 | 4.4380421 | 7.3762701 | 6 |
| DE | 7.0 | 9167.5 | 3.0380421 | 0.9762701 | 2 |
| FL | -1.8 | 372.0 | 0.3380421 | -1.7237299 | 26 |
| GA | -6.8 | 2239.5 | -0.9619579 | 0.2762701 | 13 |
| IA | 1.0 | 3921.5 | 6.5380421 | -2.8237299 | 13 |
| ID | -17.4 | 3194.0 | 5.1380421 | -2.3237299 | 14 |
| IL | 7.7 | 6489.5 | 1.8380421 | 2.0762701 | 39 |
| IN | -6.2 | 1086.5 | 2.5380421 | -4.6237299 | 20 |
| KS | -11.5 | 144.5 | 6.4380421 | 1.7762701 | 13 |
| KY | -10.4 | -5880.5 | -5.4619579 | -6.9237299 | 13 |
| LA | -9.7 | -5120.5 | -4.7619579 | -5.3237299 | 12 |
| MA | 11.7 | 14048.5 | 5.2380421 | 9.1762701 | 8 |
| MD | 8.5 | 19404.5 | 4.2380421 | 7.3762701 | 5 |
| ME | 5.5 | 1335.5 | 5.8380421 | -1.1237299 | 3 |
| MI | 3.8 | 2966.0 | 3.8380421 | -2.2237299 | 34 |
| MN | 2.3 | 15378.0 | 8.3380421 | 3.3762701 | 17 |
| MO | -3.1 | 1279.5 | 1.7380421 | -2.4237299 | 11 |
| MS | -9.5 | -8466.5 | -6.6619579 | -7.1237299 | 1 |
| MT | -7.1 | -6482.0 | 7.6380421 | 0.3762701 | 7 |
| NC | -4.3 | -310.0 | -1.4619579 | -1.5237299 | 20 |
| ND | -10.4 | -813.0 | 4.3380421 | -2.0237299 | 6 |
| NE | -13.5 | 4927.5 | 7.0380421 | -0.3237299 | 2 |
| NH | 1.6 | 17303.0 | 7.8380421 | 4.6762701 | 7 |
| NJ | 4.4 | 19935.5 | 2.5380421 | 5.7762701 | 11 |
| NM | 2.4 | -2401.5 | -0.6619579 | -0.5237299 | 3 |
| NY | 10.2 | 5007.5 | -0.4619579 | 3.3762701 | 24 |
| OH | -0.7 | 2469.0 | 3.4380421 | -2.9237299 | 29 |
| OK | -16.9 | -3087.5 | 1.0380421 | -3.7237299 | 23 |
| OR | 4.0 | 1274.5 | 5.5380421 | 1.0762701 | 118 |
| PA | 2.0 | 4218.0 | 2.3380421 | -1.6237299 | 147 |
| RI | 11.2 | 8141.0 | -1.5619579 | 1.5762701 | 3 |
| SC | -7.8 | -2191.5 | -3.2619579 | -3.6237299 | 15 |
| SD | -8.9 | 785.0 | 5.0380421 | -2.5237299 | 3 |
| TN | -8.7 | -2995.5 | -3.6619579 | -4.4237299 | 13 |
| UT | -20.2 | 12735.5 | 8.1380421 | 2.0762701 | 1 |
| VA | -1.7 | 11296.0 | 1.9380421 | 5.4762701 | 33 |
| VT | 13.4 | 8467.0 | 6.8380421 | 5.3762701 | 13 |
| WA | 5.0 | 9907.5 | 7.5380421 | 3.6762701 | 9 |
| WI | 2.4 | 4234.5 | 5.5380421 | -1.6237299 | 10 |
| WV | -7.9 | -7290.0 | -4.3619579 | -9.2237299 | 3 |
| WY | -19.7 | 4081.5 | 8.3380421 | -2.1237299 | 4 |
state_sf <- ne_states(country = 'United States of America') |> select(postal, gn_name, gadm_level, region)
state_sf |> dim() |> print()[1] 51 5
| postal | gn_name | gadm_level | region | political_value_index | median_income | hs_grad_rate | college_grad_rate | count | geometry |
|---|---|---|---|---|---|---|---|---|---|
| WA | Washington | 1 | West | 5.0 | 9907.5 | 7.538042 | 3.6762701 | 9 | MULTIPOLYGON (((-122.753 48… |
| ID | Idaho | 1 | West | -17.4 | 3194.0 | 5.138042 | -2.3237299 | 14 | MULTIPOLYGON (((-117.0382 4… |
| MT | Montana | 1 | West | -7.1 | -6482.0 | 7.638042 | 0.3762701 | 7 | MULTIPOLYGON (((-116.0482 4… |
| ND | North Dakota | 1 | Midwest | -10.4 | -813.0 | 4.338042 | -2.0237299 | 6 | MULTIPOLYGON (((-104.0476 4… |
| MN | Minnesota | 1 | Midwest | 2.3 | 15378.0 | 8.338042 | 3.3762701 | 17 | MULTIPOLYGON (((-97.22609 4… |
| MI | Michigan | 1 | Midwest | 3.8 | 2966.0 | 3.838042 | -2.2237299 | 34 | MULTIPOLYGON (((-84.4913 46… |
| postal | gn_name | gadm_level | region | political_value_index | median_income | hs_grad_rate | college_grad_rate | count | geometry |
|---|---|---|---|---|---|---|---|---|---|
| TX | Texas | 1 | South | NA | NA | NA | NA | NA | MULTIPOLYGON (((-103.3115 2… |
| DC | District of Columbia | 1 | South | NA | NA | NA | NA | NA | MULTIPOLYGON (((-77.02293 3… |
| HI | Hawaii | 1 | West | NA | NA | NA | NA | NA | MULTIPOLYGON (((-154.8996 1… |
| NV | Nevada | 1 | West | NA | NA | NA | NA | NA | MULTIPOLYGON (((-114.0425 4… |
ggplot plot with more controls